View Javadoc
1   package edu.jiangxin.apktoolbox.file.core;
2   
3   import java.io.File;
4   import java.io.FileInputStream;
5   import java.io.IOException;
6   import java.nio.charset.Charset;
7   import java.util.concurrent.CountDownLatch;
8   
9   import org.apache.commons.lang3.StringUtils;
10  import org.apache.logging.log4j.LogManager;
11  import org.apache.logging.log4j.Logger;
12  import org.mozilla.universalchardet.UniversalDetector;
13  
14  import info.monitorenter.cpdetector.io.ASCIIDetector;
15  import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
16  import info.monitorenter.cpdetector.io.JChardetFacade;
17  import info.monitorenter.cpdetector.io.ParsingDetector;
18  import info.monitorenter.cpdetector.io.UnicodeDetector;
19  
20  /**
21   * @author jiangxin
22   * @author 2018-09-09
23   *
24   */
25  public class EncoderDetector {
26      private static final Logger logger = LogManager.getLogger(EncoderDetector.class.getSimpleName());
27      private static String[] detectorCharsets;
28      private static CountDownLatch countDownLatch;
29  
30      /**
31       * Detect the charset of some file
32       * 
33       * @param fileName
34       * @return charset
35       */
36      public static String judgeFile(String fileName) {
37          File file = new File(fileName);
38          if (!file.exists()) {
39              logger.error("Can't find the file: " + fileName);
40              return null;
41          }
42  
43          detectorCharsets = new String[2];
44          countDownLatch = new CountDownLatch(2);
45  
46          Thread cpDetectorThread = new Thread(new Runnable() {
47              @Override
48              public void run() {
49                  CodepageDetectorProxy cpDetectorProxy = CodepageDetectorProxy.getInstance();
50  
51                  // first one returning non-null wins the decision
52                  cpDetectorProxy.add(new ParsingDetector(false));
53                  cpDetectorProxy.add(JChardetFacade.getInstance());
54                  cpDetectorProxy.add(ASCIIDetector.getInstance());
55                  cpDetectorProxy.add(UnicodeDetector.getInstance());
56                  Charset charset = null;
57                  try {
58                      // f.toURL()已经废弃,建议通过toURI()间接转换
59                      charset = cpDetectorProxy.detectCodepage(file.toURI().toURL());
60                  } catch (IOException e) {
61                      logger.error("cpDetector failed", e);
62                      detectorCharsets[0] = null;
63                  }
64                  if (charset != null) {
65                      detectorCharsets[0] = charset.name();
66                  } else {
67                      detectorCharsets[0] = null;
68                  }
69                  countDownLatch.countDown();
70              }
71          });
72  
73          Thread universalDetectorThread = new Thread(new Runnable() {
74              @Override
75              public void run() {
76                  UniversalDetector universalDetector = new UniversalDetector(null);
77                  byte[] buf = new byte[4096];
78                  FileInputStream fis = null;
79                  try {
80                      fis = new FileInputStream(file);
81                      int nread;
82                      while ((nread = fis.read(buf)) > 0 && !universalDetector.isDone()) {
83                          universalDetector.handleData(buf, 0, nread);
84                      }
85                      universalDetector.dataEnd();
86                      detectorCharsets[1] = universalDetector.getDetectedCharset();
87                  } catch (IOException e) {
88                      logger.error("universalDetector failed", e);
89                      detectorCharsets[1] = null;
90                  } finally {
91                      if (fis != null) {
92                          try {
93                              fis.close();
94                          } catch (IOException e) {
95                              logger.error("close fis failed", e);
96                          }
97                      }
98                  }
99                  countDownLatch.countDown();
100             }
101         });
102         cpDetectorThread.start();
103         universalDetectorThread.start();
104         try {
105             countDownLatch.await();
106         } catch (InterruptedException e) {
107             logger.error("await InterruptedException");
108             Thread.currentThread().interrupt();
109         }
110         return electBestCharset();
111     }
112 
113     private static String electBestCharset() {
114         StringBuilder sb = new StringBuilder();
115         sb.append("cpDetector: ").append(detectorCharsets[0]).append(", universalDetector: ")
116                 .append(detectorCharsets[1]);
117         if (StringUtils.isEmpty(detectorCharsets[0]) && StringUtils.isEmpty(detectorCharsets[1])) {
118             logger.warn(sb.toString());
119             return null;
120         } else if (StringUtils.isEmpty(detectorCharsets[0]) && StringUtils.isNotEmpty(detectorCharsets[1])) {
121             logger.info(sb.toString());
122             return detectorCharsets[1];
123         } else if (StringUtils.isNotEmpty(detectorCharsets[0]) && StringUtils.isEmpty(detectorCharsets[1])) {
124             logger.info(sb.toString());
125             return detectorCharsets[0];
126         } else if (detectorCharsets[0].equals(detectorCharsets[1])) {
127             logger.info(sb.toString());
128             return detectorCharsets[1];
129         } else {
130             logger.warn(sb.toString());
131             return detectorCharsets[1];
132         }
133 
134     }
135 }